In [1]:
# import required modules

import warnings
warnings.filterwarnings('ignore')

import sys
stdout = sys.stdout 

import pickle

from fdc import FDC
# from https://github.com/alexandreday/fast_density_clustering

import os
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

import pickle

import cv2 # OpenCV

from scipy import ndimage as ndi

from skimage.morphology import watershed
from skimage.feature import peak_local_max

from sklearn.manifold import TSNE
from sklearn.metrics import confusion_matrix

from matplotlib import colors
from matplotlib import cm

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from skimage.io import imsave
from skimage.io import imread

from read_roi import read_roi_file
from read_roi import read_roi_zip
# from https://github.com/hadim/read-roi


import seaborn.apionly as sns
sns.set_context("poster")
from IPython.display import display

%matplotlib inline
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'
/Users/gu6/anaconda/envs/py36/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
In [22]:
DIR = '/Users/gu6/GoogleDrive/datascience/work/sec_final/scn'
classes_for_UNet = {'C4':8,'C13':2,'C3':1,'C9':5,'C1':7,'C12':3,'C8':6,'C11':4,'C5':9,'C6':10}
class_map_inv = {v: k for k, v in classes_for_UNet.items()}
modelprobes = ['trpm8','calca','nppb','tmem233','trpa1','fxyd2','mrgprd','s100b']
In [8]:
def binarize(preds):
    # function discretizes the predictions, i.e. for each pixel gives the cell class with the
    # highest probability, as well as some image cleanup
    
    # input are class prediction probabilities with dimensions height x width x (N_CLASSES + 1)
    
    # we take the maximum class for each pixel and encode it as a one-hot array, i.e.
    # we still have a stack of (N_CLASSES + 1) x height x width
    preds = np.argmax(preds,axis=-1)
    preds = (np.arange(preds.max()+1) == preds[...,None]).astype(np.uint8)
    # we then morphologically open all layers except the background layer (0)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6));
    for class_ in range(1,11):
        preds[:,:,class_] = cv2.morphologyEx(preds[:,:,class_], cv2.MORPH_OPEN, kernel)
        
    # we redefine the background layer as the leftover from all other layers   
    preds[:,:,0] = np.max(preds[:,:,1:],axis=-1) == 0
    
    
    return preds

def get_contours(pred_bin, kernel = np.ones((2,2))):
    # function performs distance transform watershed segmentation on the individual
    # class layers of a discretized prediction
    
    # returns a list of the openCV contours of the cells and a list of the cell
    # classes (same order)
    contours = []
    classes = []
    
    # we segment each class layer separately
    for class_ in range(1,pred_bin.shape[-1]):
        # get single 2D image of the layer
        opening = pred_bin[:,:,class_]
        # calculate distance transform
        distance = ndi.distance_transform_edt(opening)
        # get local maxima as seeds for watershed
        local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((8, 8)),
                                    labels=opening)
        # slightly dilate binary cells to give us an outline slightly outside the cells
        dilated = cv2.dilate(opening,kernel,iterations = 1)
        
        # make a label map by watershed segmentation
        markers = ndi.label(local_maxi)[0]
        labels = watershed(-distance, markers, mask=dilated)
        
        # translate labels to openCV contours and add class of cell to class list
        for i in range(1,labels.max()+1):
            _, contour,_ = cv2.findContours((labels == i).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            contours.append(contour[0].astype(int))
            classes.append(class_)
            
    return contours, classes

slide 1

In [39]:
def process(basenames,probes,slide):
    preds = []
    ISHs = []
    for name in basenames:
        ISHs.append(imread(os.path.join(DIR,name + '.tif')))
        preds.append(np.moveaxis(imread(os.path.join(DIR,name + '_pred.tif')),0,-1))
    binpreds = []
    for pred in preds:
        binpreds.append(binarize(pred))
        
    # display binary prediction
    for bin_pred in binpreds:
        colorpred = np.argmax(bin_pred,axis=-1)
        cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])    
        sm = matplotlib.cm.ScalarMappable(cmap=cmap)
        rgb = (sm.to_rgba(colorpred)[:,:,:3]* 255).astype(np.uint8) 
        plt.figure(figsize=(21,7))
        plt.imshow(rgb)
        plt.axis('off')
        
    
    # specify dilation kernel for the segmentation function
    kernel = np.array([[0,1,0],[1,1,1],[0,1,0]],dtype=np.uint8)

    # segment all 8 discretized predictions
    means_list = []
    stand_list = []
    norm_list = []
    
    for i,bin_pred in enumerate(binpreds):
        # get contours
        c, cc = get_contours(bin_pred,kernel)
        
        # display contours
        c_img = np.zeros(ISHs[i].shape[1:] + (3,))
        for class_ in range(len(classes_for_UNet)):
            for j in range(len(c)):
                if cc[j] == class_:
                    c_img = cv2.drawContours(c_img, c, j, cmap(class_)[:3] , thickness = 2)
        plt.figure(figsize=(20,20))
        plt.imshow(c_img)
        plt.axis('off')
        
        cc = [class_map_inv[x] for x in cc]

        #make a pandas frame with ids, basename,i,contours,means
        df = pd.DataFrame({'slide':slide,'img_number':i,'class':cc,'contour':c,'basename':basenames[i]})
        df['id']=[str(slide) + '-' + str(i)+'-'+str(x) for x in df.index.values]
        df.index = df.id
        #calculate background and means for all channels
        
        means = []
        means_stand = []
        means_norm = []
        
        img = ISHs[i]
        bg_msk = np.ones(img.shape[1:],dtype = np.uint8)
        
        for j in range(len(c)):
            msk = np.zeros(img.shape[1:],dtype = np.uint8)
            msk = cv2.drawContours(msk, c, j, (1) , cv2.FILLED)
            bg_msk = cv2.drawContours(bg_msk, c, j, (0) , cv2.FILLED)
            means.append(np.sum(img * msk,axis=(1,2))/np.sum(msk))
            
        bg = np.quantile((bg_msk * img).reshape(img.shape[0],-1),0.6,axis=1)[1:]
        means = pd.DataFrame(np.vstack(means)[:,1:])
        means.columns = modelprobes + probes
        means.index = df.id
        
        means_norm = (means-bg)/(means.max(axis=0)-bg)
        means_stand = pd.DataFrame(StandardScaler().fit_transform(means))
        means_stand.columns = modelprobes + probes
        means_stand.index = df.id
        
        means = pd.concat([df,means],axis=1)
        means_norm = pd.concat([df,means_norm],axis=1)
        means_stand = pd.concat([df,means_stand],axis=1)
        
        means_list.append(means)
        stand_list.append(means_stand)
        norm_list.append(means_norm)
    
    means = pd.concat(means_list)
    means_norm = pd.concat(norm_list)
    means_stand = pd.concat(stand_list)
    return ISHs, preds, binpreds, means, means_norm, means_stand
    


    
    
    
        
In [40]:
slide1_basenames = ['20190607C57BL6DxClass-1-1-1','20190607C57BL6DxClass-1-4-1','20190607C57BL6DxClass-1-6-1','20190607C57BL6DxClass-1-7-1']
slide1_probes = ['scn10a','scn11a','scn1a','scn8a']
ISHs1, preds1, binpreds1, means1, means_norm1, means_stand1 = process(slide1_basenames,slide1_probes,slide=1)
In [47]:
means1.shape
Out[47]:
(3294, 18)
In [46]:
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']   
probes = ['scn10a','scn11a','scn1a','scn8a']

fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))

for i,probe in enumerate(probes):
    sns.violinplot(means_stand1['class'], means_stand1.loc[:,probe], ax=axes[i])
    axes[i].set_title(probe)
    #axes[i].set_xticklabels(clusterlist)
    
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
In [57]:
plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C3','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C3',color='black')
plt.title('C3')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C4','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C4',color='black')
plt.title('C4')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C5','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C5',color='black')
plt.title('C5')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C6','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C6',color='black')
plt.title('C6')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C8','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C8',color='black')
plt.title('C8')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C9','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C9',color='black')
plt.title('C9')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()

plt.figure(figsize=[20,15])
plt.subplot(3,1,1)

plt.hist(means_stand1.loc[:,'scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'all',color='grey')
plt.hist(means_stand1.loc[means_stand1['class']=='C13','scn10a'],bins=50,range=[-1.5,3.5],density=True,label = 'C13',color='black')
plt.title('C13')
plt.ylabel('relative cell number')
plt.xlabel('normalized scn10a expression level')

sns.despine()
In [110]:
scn5a_names = ['20190611C57BL6DxClass-2-41-3-scn5a40x-aligned','20190611C57BL6DxClass-2-42-3-scn5a40x-aligned','20190611C57BL6DxClass-2-43-3-scn5a40x-aligned']

for i,scn5a_name in enumerate(scn5a_names):
    cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])    

    # get the right contours
    df = means2[means2.basename == slide2_basenames[i+1]]
    c = df['contour']
    cc = df['class'].replace(classes_for_UNet)
    # get the image
    c_img = cv2.cvtColor(imread(os.path.join(DIR,scn5a_name+'.tif')),cv2.COLOR_GRAY2RGB)
    c_img
    for class_ in range(len(classes_for_UNet)):
        
        for j in range(len(c)):
            if cc[j] == class_:
                c_img = cv2.drawContours(c_img, c, j, [x * 255 for x in cmap(class_)[:3] ], thickness = 1)
    plt.figure(figsize=(20,20))
    plt.imshow(c_img)
    plt.axis('off')
In [99]:
img = cv2.cvtColor(imread(os.path.join(DIR,scn5a_names[0]+'.tif')),cv2.COLOR_GRAY2RGB)
img.shape
Out[99]:
(1024, 1024, 3)
In [58]:
slide2_basenames = ['20190607C57BL6DxClass-2-6-1','20190607C57BL6DxClass-2-9-1','20190607C57BL6DxClass-2-10-1','20190607C57BL6DxClass-2-11-1']
slide2_probes = ['scn10a','scn5a','scn7a','scn9a']
ISHs2, preds2, binpreds2, means2, means_norm2, means_stand2 = process(slide2_basenames,slide2_probes,slide=2)
In [60]:
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']   
probes = ['scn10a','scn5a','scn7a','scn9a']

fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))

for i,probe in enumerate(probes):
    sns.violinplot(means_stand2['class'], means_stand2.loc[:,probe], ax=axes[i])
    axes[i].set_title(probe)
    #axes[i].set_xticklabels(clusterlist)
    
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
In [61]:
slide3_basenames = ['20190607C57BL6DxClass-3-4-1','20190607C57BL6DxClass-3-5-1','20190607C57BL6DxClass-3-6-1','20190607C57BL6DxClass-3-7-1']
slide3_probes = ['htr3a','scn1a','tac1','trpv1']
ISHs3, preds3, binpreds3, means3, means_norm3, means_stand3 = process(slide3_basenames,slide3_probes,slide=3)
In [62]:
stand_all = pd.concat([means_stand1,means_stand2,means_stand3],axis=0)
Out[62]:
basename calca class contour fxyd2 htr3a id img_number mrgprd nppb ... scn5a scn7a scn8a scn9a slide tac1 tmem233 trpa1 trpm8 trpv1
id
1-0-0 20190607C57BL6DxClass-1-1-1 -0.518791 C3 [[[423, 366]], [[422, 367]], [[422, 375]], [[4... 1.989910 NaN 1-0-0 0 0.099403 0.090108 ... NaN NaN -0.151354 NaN 1 NaN -0.682890 -0.217301 -0.309241 NaN
1-0-1 20190607C57BL6DxClass-1-1-1 -0.161979 C3 [[[519, 373]], [[518, 374]], [[518, 382]], [[5... 0.834002 NaN 1-0-1 0 -0.398419 0.553021 ... NaN NaN 0.123384 NaN 1 NaN -0.796367 -0.237519 -0.286504 NaN
1-0-2 20190607C57BL6DxClass-1-1-1 0.268074 C3 [[[460, 381]], [[459, 382]], [[459, 390]], [[4... 2.152419 NaN 1-0-2 0 -0.115025 0.517762 ... NaN NaN -0.162969 NaN 1 NaN -0.670567 -0.195791 0.202766 NaN
1-0-3 20190607C57BL6DxClass-1-1-1 0.445276 C3 [[[466, 382]], [[466, 387]], [[465, 388]], [[4... 1.489602 NaN 1-0-3 0 0.092142 0.459837 ... NaN NaN 0.214175 NaN 1 NaN -0.818674 -0.320976 0.257353 NaN
1-0-4 20190607C57BL6DxClass-1-1-1 0.164200 C3 [[[473, 383]], [[472, 384]], [[471, 384]], [[4... 1.649463 NaN 1-0-4 0 -0.052466 0.521120 ... NaN NaN -0.216670 NaN 1 NaN -0.483911 -0.231768 -0.187716 NaN

5 rows × 24 columns

In [63]:
tSNE_all = TSNE(n_components=2,random_state=1234).fit_transform(stand_all.loc[:,modelprobes])
stand_all['tSNE1'] = tSNE_all[:,0]
stand_all['tSNE2'] = tSNE_all[:,1]
In [65]:
with open(os.path.join(DIR,'stand_all.pickle'), "wb") as output_file:
    pickle.dump(stand_all, output_file)
In [66]:
#function for plotting expression levels of individual genes in tSNE projection
# function to plot tSNE

labels = stand_all['class']
clusters = list(set(labels))
cm = plt.get_cmap('tab20')
plt.figure(figsize = [15,12])
ax = plt.gca()
for i, c in enumerate(clusters):
    idx = labels == c
    plt.scatter(stand_all.loc[idx,'tSNE1'],stand_all.loc[idx,'tSNE2'],c=cm(i),label=c,s=50)
plt.legend(bbox_to_anchor=(1.0, 0.5),loc=3);
    
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
In [81]:
stand_all.shape[0]
Out[81]:
9859
In [82]:
np.sum(stand_all['class'] == 'C9')
Out[82]:
359
In [78]:
#function for plotting expression levels of individual genes in tSNE projection

def plot_gene_tSNE (df,probe,clipping=(0,1)):
    plt.set_cmap("inferno")
    df = df[df[probe].notnull()]
    plt.scatter(df.tSNE1,df.tSNE2,c=np.clip(df[probe],clipping[0],clipping[1]),s=5);
    plt.title(probe)
    ax = plt.gca()
    ax.set_facecolor([60/256,60/256,60/256,1])
In [89]:
plot_gene_tSNE(stand_all,'fxyd2',clipping=(-1.5,3.5))
In [91]:
# plot expression tSNE 
plt.set_cmap("inferno")
plt.figure(figsize = [20,20]);
probes = ['scn1a','scn5a','scn7a','scn8a','scn9a','scn10a','scn11a','htr3a','tac1']
for i,probe in enumerate(probes):

    plt.subplot(3,3,i+1);
    plot_gene_tSNE(stand_all,probe,clipping=(-1,3.5))    

# save figure if desired   
# plt.savefig('allprobes-tSNE-2.png')
<Figure size 432x288 with 0 Axes>
In [109]:
def get_9roi_pic(levels,contours,img,classes,size = 100,n=9,top=True):
    
    if top:
        idx = pd.Series(slide1_means_norm[:,9]).sort_values().tail(9).index.values
    else:
        idx = pd.Series(slide1_means_norm[:,9]).sort_values().head(9).index.values
 
    n_cols = int(np.ceil(np.sqrt(n)))
    n_rows = int(np.ceil(n/n_cols))
    
    
    # cx = m.m10 / m.m00;   cy = m.m01 / m.m00;
    # new rgb image 
    new_rgb = np.zeros([size*n_rows,size*n_cols,3])
    
    
    # for each line in df
    for i,(roi, row) in enumerate(df.iterrows()):
        
        # load image, 
        ish_stack = imread(os.path.join(DIR,'images',row.filename + '-adj.tif'))
        w = ish_stack.shape[2]
        h = ish_stack.shape[1]
        # blank blue channel for cell shape
        tmp_red = np.zeros([h,w])
        # load all rois from zip
        rois = read_roi_zip(os.path.join(DIR,'rois',row.filename + '-rois.zip'))
        if rois[roi]['type'] == 'traced' or rois[roi]['type'] == 'freehand':
            xs = rois[roi]['x']
            ys = rois[roi]['y']
            pts = np.array(list(zip(xs,ys))).reshape((-1,1,2))
            center_x = int((np.min(xs) + np.max(xs))/2)
            center_y = int((np.min(ys) + np.max(ys))/2)
            cv2.fillPoly(tmp_red,[pts],True,(255))
        elif rois[roi]['type'] == 'rectangle' :
            x1 = rois[roi]['left']
            y1 = rois[roi]['top']
            x2 = x1 + rois[roi]['width']
            y2 = y1 + rois[roi]['height']
            cv2.rectangle(tmp_red,(x1,y1),(x2,y2),(255),cv2.FILLED)
            center_x = int((x1+x2)/2)
            center_y = int((y1+y2)/2)
        
        elif rois[roi]['type'] == 'oval':
            x1 = rois[roi]['left']
            y1 = rois[roi]['top']
            width = rois[roi]['width']
            height = rois[roi]['height']
            center_x = x1 + int(round(width/2))
            center_y = y1 + int(round(height/2))
            cv2.ellipse(tmp_red,(center_x,center_y), (width,height), 0, 0, 180, (255), cv2.FILLED)
                
        else:
            print('error: ',rois[roi])
            
        
        x1 = min(max(int(center_x - size/2),0),w-size)
        x2 = x1 + size
        y1 = min(max(int(center_y - size/2),0),h-size)
        y2 = y1 + size
        
        tmp_red = tmp_red[y1:y2,x1:x2] * 255
        
        row = int(i/n_cols)
        col = int(i%n_cols)
        
        new_rgb[(row*size):((row+1)*size),(col*size):((col+1)*size),0] = tmp_red
        new_rgb[(row*size):((row+1)*size),(col*size):((col+1)*size),1] = ish_stack[ref_channel,y1:y2,x1:x2]

    
    new_rgb = new_rgb.astype(np.uint8)
    
    plt.figure(figsize=[20,20])
    plt.imshow(new_rgb)
    return new_rgb
rgb = get_9roi_pic(results.loc[raw_piezo[raw_piezo.cluster == 'C1'].sort_values('level').tail(10).index[:-1],['filename']],ref_channel=13)
In [306]:
rgb = np.zeros(slide2_ISHs[0].shape[1:] + (3,),dtype=np.uint8)
rgb[:,:,1] = slide2_ISHs[0][12]
green = np.zeros(slide2_ISHs[0].shape[1:])
for j in range(len(contours[0])):
    red = cv2.drawContours(green, contours[0], j, (255) , cv2.FILLED)
rgb[:,:,0] = red
plt.imshow(rgb)
imsave('scn9a-test.tif',rgb)

slide 3

In [272]:
slide3_preds = []
slide3_ISHs = []
slide3_preds.append(np.moveaxis(imread(os.path.join(DIR,'20190607C57BL6DxClass-3-7-1_pred.tif')),0,-1))
slide3_ISHs.append(imread(os.path.join(DIR,'20190607C57BL6DxClass-3-7-1.tif')))
probes = ['htr3a','scn1a','tac1','trpv1']
In [273]:
def binarize(preds):
    # function discretizes the predictions, i.e. for each pixel gives the cell class with the
    # highest probability, as well as some image cleanup
    
    # input are class prediction probabilities with dimensions height x width x (N_CLASSES + 1)
    
    # we take the maximum class for each pixel and encode it as a one-hot array, i.e.
    # we still have a stack of (N_CLASSES + 1) x height x width
    preds = np.argmax(preds,axis=-1)
    preds = (np.arange(preds.max()+1) == preds[...,None]).astype(np.uint8)
    # we then morphologically open all layers except the background layer (0)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6));
    for class_ in range(1,11):
        preds[:,:,class_] = cv2.morphologyEx(preds[:,:,class_], cv2.MORPH_OPEN, kernel)
        
    # we redefine the background layer as the leftover from all other layers   
    preds[:,:,0] = np.max(preds[:,:,1:],axis=-1) == 0
    
    
    return preds


# we make a list of binary predictions and fill it with the discretized 8 channel
# predictions
slide3_binpreds = []

for pred in slide3_preds:
    slide3_binpreds.append(binarize(pred))
In [274]:
#display colormaps for the discretized predictions
for i,bin_pred in enumerate(slide3_binpreds):
    colorpred = np.argmax(bin_pred,axis=-1)
    cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])    
    sm = matplotlib.cm.ScalarMappable(cmap=cmap)
    rgb = (sm.to_rgba(colorpred)[:,:,:3]* 255).astype(np.uint8) 
    plt.figure(figsize=(21,7))
    plt.imshow(rgb)
    plt.axis('off')
    
    #save figure if desired
    #imsave(os.path.join(DIR,'images',IMG_NAMES[i] + '-predcolor.tif'),rgb)

Distance Transform Watershed Segmentation to detect individual cells

In order to segment out individual cells from blobs of partially overlapping cells we perform a distance transform watershed segmentation and store the individual ROIs as OpenCV contours.

In [275]:
def get_contours(pred_bin, kernel = np.ones((2,2))):
    # function performs distance transform watershed segmentation on the individual
    # class layers of a discretized prediction
    
    # returns a list of the openCV contours of the cells and a list of the cell
    # classes (same order)
    contours = []
    classes = []
    
    # we segment each class layer separately
    for class_ in range(1,pred_bin.shape[-1]):
        # get single 2D image of the layer
        opening = pred_bin[:,:,class_]
        # calculate distance transform
        distance = ndi.distance_transform_edt(opening)
        # get local maxima as seeds for watershed
        local_maxi = peak_local_max(distance, indices=False, footprint=np.ones((8, 8)),
                                    labels=opening)
        # slightly dilate binary cells to give us an outline slightly outside the cells
        dilated = cv2.dilate(opening,kernel,iterations = 1)
        
        # make a label map by watershed segmentation
        markers = ndi.label(local_maxi)[0]
        labels = watershed(-distance, markers, mask=dilated)
        
        # translate labels to openCV contours and add class of cell to class list
        for i in range(1,labels.max()+1):
            _, contour,_ = cv2.findContours((labels == i).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            contours.append(contour[0].astype(int))
            classes.append(class_)
            
    return contours, classes
In [276]:
# this is a list of lists (per image) of contours (per cell)
contours = []
# and a corresponding list of lists (per image) of cell class (per cell)
contour_classes = []
# specify dilation kernel for the segmentation function
kernel = np.array([[0,1,0],[1,1,1],[0,1,0]],dtype=np.uint8)

# segment all 8 discretized predictions
for bin_pred8 in slide3_binpreds:
    c, cc = get_contours(bin_pred8,kernel)
    contours.append(c)
    contour_classes.append(cc)
In [66]:
cmap = colors.ListedColormap(['white', 'red','green','blue','magenta','cyan','gold','purple','tan','sienna','darkorange'])    
Out[66]:
(1.0, 0.0, 0.0)
In [277]:
for i,img_contours in enumerate(contours): 
    img = np.zeros(slide1_ISHs[i].shape[1:] + (3,))
    for class_ in range(len(classes_for_UNet)):
        for j in range(len(img_contours)):
            if contour_classes[i][j] == class_:
                img = cv2.drawContours(img, img_contours, j, cmap(class_)[:3] , thickness = 2)
    
    
    
    
    
    plt.figure(figsize=(20,20))
    print(img.shape)
    plt.imshow(img)
    plt.axis('off')
    
(1024, 1024, 3)
In [278]:
# get all channels
slide3_means = []
slide3_bgs = []
for i,img in enumerate(slide3_ISHs):
    #img = img[9:]
    means = []
    bg_msk = np.ones(img.shape[1:],dtype = np.uint8)
    for j in range(len(contours[i])):
        msk = np.zeros(img.shape[1:],dtype = np.uint8)
        msk = cv2.drawContours(msk, contours[i], j, (1) , cv2.FILLED)
        bg_msk = cv2.drawContours(bg_msk, contours[i], j, (0) , cv2.FILLED)

        means.append(np.sum(img * msk,axis=(1,2))/np.sum(msk))
    bg = (bg_msk * img).reshape(img.shape[0],-1)
    
    slide3_means.append(np.vstack(means))
    slide3_bgs.append(np.quantile(bg,0.6,axis=1))
In [ ]:
 
In [ ]:
 
In [279]:
#normalize and standardize each img
#normalizing should be done between max value for a cell and the background

slide3_means_norm = []
slide3_means_stand = []
for i,means in enumerate(slide3_means):
    
    slide3_means_norm.append((means-slide3_bgs[i])/(means.max(axis=0)-slide3_bgs[i]))
    slide3_means_stand.append(StandardScaler().fit_transform(means))
slide3_means_norm = np.vstack(slide3_means_norm)    
slide3_means_stand = np.vstack(slide3_means_stand)    

contour_classes_all_3 = [item for sublist in contour_classes for item in sublist]
contour_classes_all_3 = [class_map_inv[x] for x in contour_classes_all_3]
In [280]:
means_stand_all = np.vstack([slide1_means_stand,slide2_means_stand,slide3_means_stand])
classes_all = [item for sublist in [contour_classes_all_1,contour_classes_all_2,contour_classes_all_3] for item in sublist]
In [281]:
tSNE_all = TSNE(n_components=2,random_state=1234).fit_transform(means_stand_all[:,1:9])
In [230]:
# function to plot tSNE
def plot_tsne(bg_sub_tsne,labels,clusters = [None]):
    labels = pd.Series(labels)
    if clusters == [None]:
        clusters = list(set(labels))
    cm = plt.get_cmap('tab20')
    plt.figure(figsize = [15,12])
    ax = plt.gca()
    for i, c in enumerate(clusters):
        idx = labels == c
        plt.scatter(bg_sub_tsne[idx,0],bg_sub_tsne[idx,1],c=cm(i),label=c,s=50)
    plt.legend(bbox_to_anchor=(1.0, 0.5),loc=3);
    
In [282]:
plot_tsne(tSNE_all,classes_all)
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'.  Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
In [283]:
#clusterlist = ['C1','C3','C4','C5','C6','C8','C9','C11','C12','C13']   

fig, axes = plt.subplots(ncols=1, nrows=len(probes), figsize=(21,60))

for i in range(len(probes)):
    sns.violinplot(contour_classes_all_3, slide3_means_norm[:,i+9], ax=axes[i])
    axes[i].set_title(probes[i])
    #axes[i].set_xticklabels(clusterlist)
    
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
In [ ]: